
set more off
ssc install expandby, replace

* read in the raw crop database
use "$dir\datasets\crop_database_raw.dta", clear

** cleaning **
replace production=0 if production==.
replace area=0 if area==.
gen season2=subinstr(season, " ", "", .)
drop season
rename season2 season
gen crop2=subinstr(crop, " ", "", .)
drop crop
rename crop2 crop

** fix some typos
replace crop="arhar/tur" if crop=="arhartur"
replace crop="castorseed" if crop=="castorneed"

order state_name state_code dist_name dist2 dist_code crop year season
** analysis uses 2001 codes for NREGA roll-out, but crop database has some newer districts, need to consolidate those
bys state_name dist_name crop year season: egen area_tot=total(area)
bys state_name dist_name crop year season: egen production_tot=total(production)

drop yield area production
rename area_tot area
rename production_tot production

* drop the duplicates
drop dist2
duplicates drop

* turn years into numbers
split year, p("-")
destring year1, replace
drop year2
drop year
rename year1 year
order state_name state_code dist_name dist_code crop year season

bys state_name dist_name crop year: egen number=count(season)
gen area_year=.
replace area_year=area if season=="Total"
replace area_year=area if season=="WholeYear"
replace area_year=area if number==1 & season!="WholeYear"
bys state_name dist_name crop year: egen number2=count(area_year)
drop number2

gen production_year=.
replace production_year=production if season=="Total"
replace production_year=production if season=="WholeYear"
replace production_year=production if number==1 & season!="WholeYear"
bys state_name dist_name crop year: egen number2=count(production_year)
drop number2

save "$dir\datasets\crop_database_cleaned.dta", replace


keep state_name state_code dist_name dist_code crop season
duplicates drop
count
gen id=_n
* create one observation for every year
expandby 9, by(id)
count
gen year=.
bys id: replace year=2001 if _n==1
bys id: replace year=2002 if _n==2
bys id: replace year=2003 if _n==3
bys id: replace year=2004 if _n==4
bys id: replace year=2005 if _n==5
bys id: replace year=2006 if _n==6
bys id: replace year=2007 if _n==7
bys id: replace year=2008 if _n==8
bys id: replace year=2009 if _n==9
drop id

merge 1:m state_name state_code dist_name dist_code crop season year using "$dir\datasets\crop_database_cleaned.dta"

drop number
bys state_name dist_name crop year: egen number=count(season)

replace area=0 if _merge==1
replace production=0 if _merge==1

replace area_year=area if season=="Total" & _merge==1
replace area_year=area if season=="WholeYear" & _merge==1
replace area_year=area if number==1 & season!="WholeYear" & _merge==1

replace production_year=production if season=="Total" & _merge==1
replace production_year=production if season=="WholeYear" & _merge==1
replace production_year=production if number==1 & season!="WholeYear" & _merge==1

drop _merge

********************************************************************************************************************
* yield and weighted mean and standard deviation of yield

gen yield_year=production_year/area_year
gen temp1 = yield_year*area_year
bys state_name dist_name year: egen temp2=total(temp1) 
bys state_name dist_name year: egen temp3=total(area_year) 
gen weighted_mean_year = temp2/temp3

gen temp4 = (yield_year - weighted_mean_year)*(yield_year - weighted_mean_year)*area_year
bys state_name dist_name year: egen temp5=total(temp4) 
gen temp6 = temp5/temp3
gen weighted_sd_year = sqrt(temp6)

drop temp1 temp2 temp3 temp4 temp5 temp6

replace yield_year=0 if area_year==0


gen cash_crop=0
replace cash_crop=1 if crop=="sugarcane" | crop=="cotton(lint)" | crop=="jute" | crop=="otheroilseeds" | crop=="tobacco" | crop=="groundnut" | crop=="rapeseed&mustard" | crop=="soyabean" | crop=="sunflower" | crop=="sesamum" | crop=="nigerseed" | crop=="safflower" | crop=="castorseed" | crop=="linseed" | crop=="coconut"

gen c_production_year=production_year if cash_crop==1
gen c_area_year = area_year if cash_crop==1
gen c_yield_year = c_production_year/c_area_year

bys state_name dist_name year: egen cash_production_year=total(c_production_year)
bys state_name dist_name year: egen cash_area_year=total(c_area_year)
gen cash_yield_year=cash_production_year/cash_area_year

replace c_yield_year=0 if c_area_year==0


merge m:1 state_code dist_code using "$dir\datasets\NSS dataset creation\nss_64_10\dist_phase_nregs.dta"
drop if _merge==2
drop _merge
tab state_code, gen(state1)
tab year, gen(year1)
gen state_dist=state_code*100+dist_code

sort state_code dist_code year season crop

preserve
keep if year==2005
keep c_production_year c_area_year c_yield_year production_year area_year yield_year state_name state_code dist_code crop season    
rename c_production_year c_production_year_2005
rename c_area_year c_area_year_2005
rename c_yield_year c_yield_year_2005
rename production_year production_year_2005
rename area_year area_year_2005
rename yield_year yield_year_2005
duplicates drop
save "$dir\datasets\baseline_2005_crop.dta", replace 
restore

merge m:1 state_name state_code dist_code crop season using "$dir\datasets\baseline_2005_crop.dta"
drop _merge

replace yield_year=0 if area_year==0

merge m:1 state_code dist_code using "$dir\datasets\annual_small_farmers_baseline.dta"
drop _merge

gen state_rank_norm_sq_2=state_rank_norm_2*state_rank_norm_2
gen nrega_r=small_farmers*nrega_pred_2
gen int_eff_2=nrega_pred_2*state_rank_norm_2
gen int_sq_2=nrega_pred_2*state_rank_norm_sq_2
gen int_r=int_eff_2*small_farmers
gen int_sq_r=int_sq_2*small_farmers

keep if year==2007
drop if state_code==3 | state_code==24 | state_code==10 | state_code==19 | state_code==20

tab crop, gen(crop1)

** generate district tag
cap drop dist_tag
egen dist_tag= tag(state_dist)

** 2005 farm harvest prices 
gen farmharvest_price_2005=.
replace farmharvest_price_2005=5442.5 if crop=="arecanut"
replace farmharvest_price_2005=1812.219 if crop=="arhar/tur"
replace farmharvest_price_2005=595.9877 if crop=="bajra"
replace farmharvest_price_2005=1256.429 if crop=="banana"
replace farmharvest_price_2005=698.9043 if crop=="barley"
replace farmharvest_price_2005=4090.739 if crop=="blackpepper"
replace farmharvest_price_2005=3584 if crop=="cashewnut"
replace farmharvest_price_2005=1334.538 if crop=="castorseed"
replace farmharvest_price_2005=6459.5 if crop=="coconut"
replace farmharvest_price_2005=4485.8 if crop=="cotton(lint)"
replace farmharvest_price_2005=3351.182 if crop=="drychillies"
replace farmharvest_price_2005=3058.013 if crop=="ginger"
replace farmharvest_price_2005=1834.795 if crop=="gram"
replace farmharvest_price_2005=1641.765 if crop=="groundnut"
replace farmharvest_price_2005=646.9888 if crop=="jowar"
replace farmharvest_price_2005=1059.966 if crop=="jute"
replace farmharvest_price_2005=1741.829 if crop=="linseed"
replace farmharvest_price_2005=568.9614 if crop=="maize"
replace farmharvest_price_2005=940.4444 if crop=="mesta"
replace farmharvest_price_2005=1438 if crop=="nigerseed"
replace farmharvest_price_2005=590.9624 if crop=="potato"
replace farmharvest_price_2005=522.9753 if crop=="ragi"
replace farmharvest_price_2005=1743.456 if crop=="rapeseed&mustard"
replace farmharvest_price_2005=952.8113 if crop=="rice"
replace farmharvest_price_2005=1380.118 if crop=="sannhamp" /* value for vegetables */
replace farmharvest_price_2005=2439.805 if crop=="sesamum"
replace farmharvest_price_2005=1097.183 if crop=="soyabean"
replace farmharvest_price_2005=292.25 if crop=="sugarcane"
replace farmharvest_price_2005=1291.419 if crop=="sunflower"
replace farmharvest_price_2005=434.3333 if crop=="tapioca"
replace farmharvest_price_2005=3403.596 if crop=="tobacco"
replace farmharvest_price_2005=2439.88 if crop=="turmeric"
replace farmharvest_price_2005=807.3784 if crop=="wheat"

************** Create district-wise weighted total, with farm prices as the weights ***********************
bys state_dist: egen wdist_prod = total(farmharvest_price_2005 * production_year)
bys state_dist: egen wdist_cash_prod = total(farmharvest_price_2005 * c_production_year)
bys state_dist: egen wdist_prod_2005 = total(farmharvest_price_2005 * production_year_2005)
bys state_dist: egen wdist_cash_prod_2005 = total(farmharvest_price_2005 * c_production_year_2005)


*** weighted average production and yield
egen num1 = total(production_year * farmharvest_price_2005), by(state_dist) 
egen den1 = total(farmharvest_price_2005), by(state_dist) 
gen wavprod = num1/den1

egen num2 = total(production_year_2005 * farmharvest_price_2005), by(state_dist) 
egen den2 = total(farmharvest_price_2005), by(state_dist) 
gen wavprod_2005 = num2/den2

egen num3 = total(c_production_year * farmharvest_price_2005), by(state_dist) 
egen den3 = total(farmharvest_price_2005), by(state_dist) 
gen c_wavprod = num3/den3

egen num4 = total(c_production_year_2005 * farmharvest_price_2005), by(state_dist) 
egen den4 = total(farmharvest_price_2005), by(state_dist) 
gen c_wavprod_2005 = num4/den4

drop num1 num2 num3 num4 den1 den2 den3 den4

egen num1 = total(yield_year * farmharvest_price_2005), by(state_dist) 
egen den1 = total(farmharvest_price_2005), by(state_dist) 
gen wavyield = num1/den1

egen num2 = total(yield_year_2005 * farmharvest_price_2005), by(state_dist) 
egen den2 = total(farmharvest_price_2005), by(state_dist) 
gen wavyield_2005 = num2/den2

egen num3 = total(c_yield_year * farmharvest_price_2005), by(state_dist) 
egen den3 = total(farmharvest_price_2005), by(state_dist) 
gen c_wavyield = num3/den3

egen num4 = total(c_yield_year_2005 * farmharvest_price_2005), by(state_dist) 
egen den4 = total(farmharvest_price_2005), by(state_dist) 
gen c_wavyield_2005 = num4/den4




